#coding:utf-8

import sys
import re
import urllib2

reload(sys)
sys.setdefaultencoding("utf-8")


def get_wooyun_info(url):
    request = urllib2.Request(url)
    request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36')

    html_content = urllib2.urlopen(request).read().replace('\r','').replace('\n', '')
    #定义正则
    pattern_wooyon = r'<tr>.*?<td>.*?href="(.*?)">(.*?)</a>.*?<a.*?href="(.*?)">.*?">(.*?)</a></th>.*?</tr>'
    #匹配正则
    result = re.findall(pattern_wooyon, html_content)
    for i in range(0, len(result)):
        result_tr = result[i]
        print result_tr[0],result_tr[1],result_tr[2],result_tr[3]

for i in range(1,4):
    url = 'http://www.wooyun.org/bugs/new_submit/page/' + str(i)
    get_wooyun_info(url)



'''
<tr>
                    <th>2015-04-20</th>
                    <td><a href="/bugs/wooyun-2015-0109129">国家互联网应急中心某外部探测系统配置不当可命令执行</a>
                                                            </td>
                    <th><a title="评论一下" href="/bugs/wooyun-2015-0109129#comment">5/25</a></th>
                    <th><a title="fuckadmin" href="/whitehats/fuckadmin">fuckad...</a></th>
                </tr>
'''